ImapFolderAgent: Only keep a single UID value for each folder in memory.

Previously it used to keep a list of the UIDs of unread mails. Now we
start to assume that UIDs in a folder identified by a UID VALIDITY value
are strictly ascending (monotonically increasing) as suggested by RFC
3501 and 4549 and just keep the highest UID seen in the last run.

This enhancement will help reduce the size of memory typically where
mails are left unread forever.

Akinori MUSHA 10 years ago
parent
commit
6a06a32447
2 changed files with 111 additions and 49 deletions
  1. 90 36
      app/models/agents/imap_folder_agent.rb
  2. 21 13
      spec/models/agents/imap_folder_agent_spec.rb

+ 90 - 36
app/models/agents/imap_folder_agent.rb

@@ -74,13 +74,14 @@ module Agents
74 74
 
75 75
       Set `mark_as_read` to true to mark found mails as read.
76 76
 
77
-      Each agent instance memorizes a list of unread mails that are
78
-      found in the last run, so even if you change a set of conditions
79
-      so that it matches mails that are missed previously, they will
80
-      not show up as new events.  Also, in order to avoid duplicated
81
-      notification it keeps a list of Message-Id's of 100 most recent
82
-      mails, so if multiple mails of the same Message-Id are found,
83
-      you will only see one event out of them.
77
+      Each agent instance memorizes the highest UID of mails that are
78
+      found in the last run for each watched folder, so even if you
79
+      change a set of conditions so that it matches mails that are
80
+      missed previously, or if you unmark already seen mails as read,
81
+      they will not show up as new events.  Also, in order to avoid
82
+      duplicated notification it keeps a list of Message-Id's of 100
83
+      most recent mails, so if multiple mails of the same Message-Id
84
+      are found, you will only see one event out of them.
84 85
     MD
85 86
 
86 87
     event_description <<-MD
@@ -220,22 +221,7 @@ module Agents
220 221
     end
221 222
 
222 223
     def check
223
-      # 'seen' keeps a hash of { uidvalidity => uids, ... } which
224
-      # lists unread mails in watched folders.
225
-      seen = memory['seen'] || {}
226
-      new_seen = Hash.new { |hash, key|
227
-        hash[key] = []
228
-      }
229
-
230
-      # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
231
-      # most recent notified mails.
232
-      notified = memory['notified'] || []
233
-
234
-      each_unread_mail { |mail|
235
-        new_seen[mail.uidvalidity] << mail.uid
236
-
237
-        next if (uids = seen[mail.uidvalidity]) && uids.include?(mail.uid)
238
-
224
+      each_unread_mail { |mail, notified|
239 225
         body_parts = mail.body_parts(mime_types)
240 226
         matched_part = nil
241 227
         matches = {}
@@ -313,12 +299,6 @@ module Agents
313 299
           mail.mark_as_read
314 300
         end
315 301
       }
316
-
317
-      notified.slice!(0...-IDCACHE_SIZE) if notified.size > IDCACHE_SIZE
318
-
319
-      memory['seen'] = new_seen
320
-      memory['notified'] = notified
321
-      save!
322 302
     end
323 303
 
324 304
     def each_unread_mail
@@ -329,22 +309,47 @@ module Agents
329 309
         log "Logging in as #{username}"
330 310
         imap.login(username, interpolated[:password])
331 311
 
312
+        # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
313
+        lastseen, seen = self.lastseen, self.make_seen
314
+
315
+        # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
316
+        # most recent notified mails.
317
+        notified = self.notified
318
+
332 319
         interpolated['folders'].each { |folder|
333 320
           log "Selecting the folder: %s" % folder
334 321
 
335 322
           imap.select(folder)
323
+          uidvalidity = imap.uidvalidity
324
+
325
+          if lastseenuid = lastseen[uidvalidity]
326
+            seen[uidvalidity] = lastseenuid
327
+            uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
328
+                   each_with_object([]) { |data, ret|
329
+              uid, flags = data.attr.values_at('UID', 'FLAGS')
330
+              seen[uidvalidity] = uid
331
+              next if uid <= lastseenuid || flags.include?(:Seen)
332
+              ret << uid
333
+            }
334
+          else
335
+            uids = imap.uid_search('UNSEEN')
336
+            seen[uidvalidity] = uids.max unless uids.empty?
337
+          end
336 338
 
337
-          unseen = imap.search('UNSEEN')
338
-
339
-          if unseen.empty?
339
+          if uids.empty?
340 340
             log "No unread mails"
341 341
             next
342 342
           end
343 343
 
344
-          imap.fetch_mails(unseen).each { |mail|
345
-            yield mail
344
+          imap.uid_fetch_mails(uids).each { |mail|
345
+            yield mail, notified
346 346
           }
347 347
         }
348
+
349
+        self.notified = notified
350
+        self.lastseen = seen
351
+
352
+        save!
348 353
       }
349 354
     ensure
350 355
       log 'Connection closed'
@@ -354,6 +359,27 @@ module Agents
354 359
       interpolated['mime_types'] || %w[text/plain text/enriched text/html]
355 360
     end
356 361
 
362
+    def lastseen
363
+      Seen.new(memory['lastseen'])
364
+    end
365
+
366
+    def lastseen= value
367
+      memory.delete('seen')  # obsolete key
368
+      memory['lastseen'] = value
369
+    end
370
+
371
+    def make_seen
372
+      Seen.new
373
+    end
374
+
375
+    def notified
376
+      Notified.new(memory['notified'])
377
+    end
378
+
379
+    def notified= value
380
+      memory['notified'] = value
381
+    end
382
+
357 383
     private
358 384
 
359 385
     def is_positive_integer?(value)
@@ -376,19 +402,47 @@ module Agents
376 402
         end
377 403
       end
378 404
 
405
+      attr_reader :uidvalidity
406
+
379 407
       def select(folder)
380 408
         ret = super(@folder = folder)
381 409
         @uidvalidity = responses['UIDVALIDITY'].last
382 410
         ret
383 411
       end
384 412
 
385
-      def fetch_mails(set)
386
-        fetch(set, %w[UID RFC822.HEADER]).map { |data|
413
+      def uid_fetch_mails(set)
414
+        uid_fetch(set, 'RFC822.HEADER').map { |data|
387 415
           Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
388 416
         }
389 417
       end
390 418
     end
391 419
 
420
+    class Seen < Hash
421
+      def initialize(hash = nil)
422
+        super()
423
+        update(hash) if hash
424
+      end
425
+
426
+      def []=(uidvalidity, uid)
427
+        # Update only if the new value is larger than the current value
428
+        if (curr = self[uidvalidity]).nil? || curr <= uid
429
+          super
430
+        end
431
+      end
432
+    end
433
+
434
+    class Notified < Array
435
+      def initialize(array = nil)
436
+        super()
437
+        replace(array) if array
438
+      end
439
+
440
+      def <<(value)
441
+        slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
442
+        super
443
+      end
444
+    end
445
+
392 446
     class Message < SimpleDelegator
393 447
       DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
394 448
 

+ 21 - 13
spec/models/agents/imap_folder_agent_spec.rb

@@ -53,7 +53,15 @@ describe Agents::ImapFolderAgent do
53 53
       ]
54 54
 
55 55
       stub(@checker).each_unread_mail.returns { |yielder|
56
-        @mails.each(&yielder)
56
+        seen = @checker.lastseen
57
+        notified = @checker.notified
58
+        @mails.each_with_object(notified) { |mail|
59
+          yielder[mail, notified]
60
+          seen[mail.uidvalidity] = mail.uid
61
+        }
62
+        @checker.lastseen = seen
63
+        @checker.notified = notified
64
+        nil
57 65
       }
58 66
 
59 67
       @payloads = [
@@ -139,9 +147,9 @@ describe Agents::ImapFolderAgent do
139 147
     describe '#check' do
140 148
       it 'should check for mails and save memory' do
141 149
         lambda { @checker.check }.should change { Event.count }.by(2)
142
-        @checker.memory['notified'].sort.should == @mails.map(&:message_id).sort
143
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
144
-          (seen[mail.uidvalidity] ||= []) << mail.uid
150
+        @checker.notified.sort.should == @mails.map(&:message_id).sort
151
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
152
+          seen[mail.uidvalidity] = mail.uid
145 153
         }
146 154
 
147 155
         Event.last(2).map(&:payload) == @payloads
@@ -153,9 +161,9 @@ describe Agents::ImapFolderAgent do
153 161
         @checker.options['conditions']['to'] = 'John.Doe@*'
154 162
 
155 163
         lambda { @checker.check }.should change { Event.count }.by(1)
156
-        @checker.memory['notified'].sort.should == [@mails.first.message_id]
157
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
158
-          (seen[mail.uidvalidity] ||= []) << mail.uid
164
+        @checker.notified.sort.should == [@mails.first.message_id]
165
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
166
+          seen[mail.uidvalidity] = mail.uid
159 167
         }
160 168
 
161 169
         Event.last.payload.should == @payloads.first
@@ -170,9 +178,9 @@ describe Agents::ImapFolderAgent do
170 178
         )
171 179
 
172 180
         lambda { @checker.check }.should change { Event.count }.by(1)
173
-        @checker.memory['notified'].sort.should == [@mails.last.message_id]
174
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
175
-          (seen[mail.uidvalidity] ||= []) << mail.uid
181
+        @checker.notified.sort.should == [@mails.last.message_id]
182
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
183
+          seen[mail.uidvalidity] = mail.uid
176 184
         }
177 185
 
178 186
         Event.last.payload.should == @payloads.last.update(
@@ -208,9 +216,9 @@ describe Agents::ImapFolderAgent do
208 216
         )
209 217
 
210 218
         lambda { @checker.check }.should_not change { Event.count }
211
-        @checker.memory['notified'].sort.should == []
212
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
213
-          (seen[mail.uidvalidity] ||= []) << mail.uid
219
+        @checker.notified.sort.should == []
220
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
221
+          seen[mail.uidvalidity] = mail.uid
214 222
         }
215 223
       end
216 224